import os
import subprocess

# 设置参数
env = "Hanabi"
hanabi = "Hanabi-Full"
num_agents = 2
algo = "mappo"
exp = "check"
seed_max = 1

# 设置文件描述符限制 (Windows下不需要 ulimit)
# 这里 ulimit -n 22222 对应的文件限制在Windows下不太相关，可以忽略

# 打印环境信息
print(f"env is {env}, algo is {algo}, exp is {exp}, max seed is {seed_max}")

# 循环执行每个 seed 的训练
for seed in range(1, seed_max + 1):
    print(f"seed is {seed}:")
    
    # 设置环境变量 CUDA_VISIBLE_DEVICES=0
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    
    # 定义命令
    command = [
        "python", "train/train_hanabi_forward.py",
        "--env_name", env,
        "--algorithm_name", algo,
        "--experiment_name", exp,
        "--hanabi_name", hanabi,
        "--num_agents", str(num_agents),
        "--seed", str(seed),
        "--n_training_threads", "1",
        "--n_rollout_threads", "1000",
        "--num_mini_batch", "1",
        "--episode_length", "100",
        "--num_env_steps", "10000000000000",
        "--ppo_epoch", "15",
        "--gain", "0.01",
        "--lr", "7e-4",
        "--critic_lr", "1e-3",
        "--hidden_size", "512",
        "--layer_N", "2",
        "--entropy_coef", "0.015"
    ]

    # 执行命令
    subprocess.run(command)
    
    print("training is done!")
